We sample over distributions of replicates

N= 10,000


In [1]:
library(data.table)
library(foreach)
library(doParallel)


Loading required package: iterators
Loading required package: parallel

In [2]:
GBD <- read.table("../Data/DALY_YLL_deaths_per_region_and_27_diseases_2005.txt")
GBD <- GBD[order(GBD$metr,GBD$Region,GBD$Disease),]

In [3]:
#Burden of all diseases in all regions for each metric of burden
GBD <- rbind(GBD,data.frame(metr=levels(GBD$metr),
           Region="All",
           Disease="all",
           burden=tapply(GBD$burden[GBD$Region=="All"],GBD$metr[GBD$Region=="All"],sum)))
rownames(GBD) <- NULL
GBD <- GBD[order(GBD$metr,GBD$Region,GBD$Disease),]

#Burden in Non-HI
DNHI <- GBD[GBD$Region=="All",]
DNHI$burden <- DNHI$burden - GBD$burden[GBD$Region=="High-income"]
DNHI$Region <- "Non-HI"
GBD <- rbind(GBD,DNHI)
GBD$Region <- as.factor(as.character(GBD$Region))
GBD <- GBD[order(GBD$metr,GBD$Region,GBD$Disease),]

#Local proportions of burden across diseases for each metric of burden
GBD$Prp <- 100*GBD$burden/rep(GBD$burden[GBD$Disease=="all"],as.numeric(table(paste(GBD$metr,GBD$Region))))

GBD <- data.table(GBD)

In [4]:
Mgbd <- read.table("../Data/27_gbd_groups.txt")
sms <- list.files("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/Metrics_over_repl/")
dis <- as.numeric(substr(sms,25,nchar(sms)-4))
dis <- dis[dis!=0]

In [5]:
#We upload all replicates
#We will measure alignment only over diseases for which we have replicates

L <- list()
for(i in 1:length(dis)){
    k <- dis[i]
    DF <- fread(paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/Metrics_over_repl/Metrics_over_replicates_",
                    as.character(k),".txt"),collapse=""))

    regs <- unique(DF$Region)
    L[[i]] <- data.table(
    Disease=Mgbd$x[k],
    Region=DF$Region[DF$Dis=="dis"],
    Prop_RCTs=100*DF$RCTs[DF$Dis=="dis"]/DF$RCTs[DF$Dis=="all"],
    Prop_Patients=100*DF$Patients[DF$Dis=="dis"]/DF$Patients[DF$Dis=="all"],
    sim_nb = rep(1:((nrow(DF)/length(regs))/2),each=length(regs)))
    
    }

DT <- rbindlist(L)

#replicate per disease
DT$k <- paste(DT$Disease,DT$sim_nb)
#nb replicates available per disease
NS <- DT[,max(sim_nb),by="Disease"]

In [6]:
NK <- 10000
set.seed(1234)
#For each disease, we sample 10,000 replicates, giving local proportions of research
sims <- do.call('rbind',tapply(NS$V1,NS$Disease,function(x){sample(1:x,NK,replace=TRUE)}))

In [7]:
cl<-makeCluster(4)
registerDoParallel(cl)
t0 <- proc.time()
A <- foreach(k = 1:NK, .packages="data.table") %dopar% {
    x <- sims[,k]
    dtt <- merge(GBD,DT[DT$k%in%paste(rownames(sims),x),])
    dtt[,.(sum(abs(Prp[Prp>=Prop_RCTs]-Prop_RCTs[Prp>=Prop_RCTs])),
       sum(abs(Prp[Prp>=2*Prop_RCTs]/2-Prop_RCTs[Prp>=2*Prop_RCTs])),
       sum(abs(Prp[Prp>=Prop_Patients]-Prop_Patients[Prp>=Prop_Patients])),
       sum(abs(Prp[Prp>=2*Prop_Patients]/2-Prop_RCTs[Prp>=2*Prop_Patients]))),
        by=.(metr,Region)]
    }
stopCluster(cl)
((proc.time() - t0)/60)


      user     system    elapsed 
 0.1820333  0.0120500 12.7565333 

In [8]:
Al <- rbindlist(A)

In [9]:
head(Al)


metrRegionV1V2V3V4
1daly All 30.14612618668298.5330737950202322.78503374888253.60760939635803
2death All 41.51162879059146.6149408722981329.10677956915091.34425726601647
3yld All 37.97109766674477.7099361032392243.78434484074746.71194328228305
4yll All 45.948656223718414.520517093118534.309287704015511.9746418906407
5daly Central Europe, Eastern Europe, and Central Asia31.0720417002882 6.00432894281371 25.1423527596846 1.00476603628302
6death Central Europe, Eastern Europe, and Central Asia53.2621863102998 19.7299773036951 48.5961498250461 19.7299773036951

In [10]:
AUI <- Al[,lapply(.SD,function(x){quantile(x,probs=c(0.025,0.5,0.975))}),by=.(metr,Region),.SDcols=c("V1","V2","V3","V4")]
AUI$UI <- rep(c("low","med","up"),times=length(unique(paste(AUI$metr,AUI$Region))))
AUI <- AUI[,c("UI","Region","metr","V1","V2","V3","V4")]
names(AUI) <- c("UI","Region","metr","RCTs_fill","RCTs_nogap","Patients_fill","Patients_nogap")

In [11]:
write.table(AUI,"../Data/Alignment_ratios_within_regions_across_diseases_wt_sims_patients_metrs_burdens.txt")

In [ ]: